package xin.nick;

import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import fr.opensagres.xdocreport.core.io.internal.ByteArrayOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

/**
 * @author Nick
 * @since 2022/6/22/022
 */
public class WordToHtml {

    public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {

        WordToHtml wordToHtml = new WordToHtml();
        String wordFileName = "123.docx";
        String htmlFileName = "123.html";
        String docxToHtml = wordToHtml.docxToHtml(wordFileName);
        FileUtils.writeStringToFile(new File(htmlFileName), docxToHtml, "utf-8");
    }

    public String docToHtml(String docFilePath) throws IOException, ParserConfigurationException, TransformerException {
        HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(new FileInputStream(docFilePath));
        WordToHtmlConverter wordToHtmlConverter = new ImageConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
        );
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer serializer = transformerFactory.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        String result = new String(out.toByteArray());

        return result;
    }


    // docx转换html
    public String docxToHtml(String fileName) throws IOException {
        XWPFDocument docxDocument = new XWPFDocument(new FileInputStream(fileName));
        XHTMLOptions options = XHTMLOptions.create();
        // 图片转base64
        options.setImageManager(new Base64EmbedImgManager());
        // 转换htm11
        ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
        XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);
        String htmlStr = htmlStream.toString();
        return htmlStr;
    }
    
}
